套件載入
## Rows: 817741 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): TRANSACTION_DT, CUSTOMER_ID, AGE_GROUP, PIN_CODE, PRODUCT_ID
## dbl (4): PRODUCT_SUBCLASS, AMOUNT, ASSET, SALES_PRICE
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: The `...` argument of `group_indices()` is deprecated as of dplyr 1.0.0.
## ℹ Please `group_by()` first
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Package
#rm(list=ls(all=T))
options(digits=4, scipen=12)
library(dplyr)
library(ggplot2)
library(caTools)
library(ROCR)
library(googleVis)
##
## Welcome to googleVis version 0.7.3
##
## Please read Google's Terms of Use
## before you start using the package:
## https://developers.google.com/terms/
##
## Note, the plot method of googleVis will by default use
## the standard browser to display its output.
##
## See the googleVis package vignettes for more details,
## or visit https://mages.github.io/googleVis/.
##
## To suppress this message use:
## suppressPackageStartupMessages(library(googleVis))
library(chorddiag)
library(Cairo)
#library(mosaic)
X0<-X0 %>% filter(!age=="a99")
X = load("data/tf4.rdata")
B<-B %>% filter(!age=="a99")
group_by(B, age) %>% summarise(
recent=mean(r),
freq=mean(f),
money=mean(m),
size=n() ) %>%
mutate( revenue = size*money/1000 ) %>%
filter(size > 1) %>%
ggplot(aes(x=freq, y=money)) +
geom_point(aes(size=revenue, col=recent),alpha=0.5) +
scale_size(range=c(4,30)) +
scale_color_gradient(low="green",high="red") +
scale_x_log10() + scale_y_log10(limits=c(500,1500)) +
geom_text(aes(label = age ),size=3) +
theme_bw() + guides(size=F) +
labs(title="Customer Segements",
subtitle="(bubble_size:revenue_contribution; text:group_size)",
color="Recency") +
xlab("Frequency") + ylab("Average Transaction Amount")
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# 根據年齡分成兩個群組
B <- B %>%
mutate(AgeGroup = case_when(
age %in% c("a29","a34","a39","a59") ~ "重要挽留客戶",
age %in% c("a44", "a49", "a54") ~ "重要保持客戶",
TRUE ~ "Other"
))
# 根據年齡群組計算統計信息
summary_data <- B %>%
group_by(AgeGroup) %>%
summarise(n = n(), Buy = mean(Buy), Rev = mean(Rev))
# 繪製泡泡圖
p <- ggplot(summary_data, aes(Buy, Rev, size = n, label = AgeGroup)) +
geom_point(alpha = 0.5, color = 'gold') +
geom_text(size = 4) +
labs(title = "Age Group Comparison (size: no. customers)") +
xlab("Avg. Buying Probability") + ylab("Avg. Expected Revenue") +
scale_size(range = c(4, 20)) + theme_bw()
# 將 ggplot 圖轉換為 plotly 圖
ggplotly(p)
library(dplyr)
library(ggplot2)
library(plotly)
# 根據年齡分成兩個群組
B <- B %>%
mutate(AgeGroup = case_when(
age %in% c("a29","a34","a39","a59") ~ "重要挽留客戶",
age %in% c("a44", "a49", "a54") ~ "重要保持客戶",
)) %>%
filter(!is.na(AgeGroup)) # 排除沒有指定群組的觀測值
# 根據年齡群組計算統計信息
summary_data <- B %>%
group_by(AgeGroup) %>%
summarise(n = n(), Buy = mean(Buy), Rev = mean(Rev))
# 繪製泡泡圖
p <- ggplot(summary_data, aes(Buy, Rev, size = n, label = AgeGroup)) +
geom_point(alpha = 0.5, color = 'gold') +
geom_text(size = 4) +
labs(title = "Age Group Comparison (size: no. customers)") +
xlab("Avg. Buying Probability") + ylab("Avg. Expected Revenue") +
scale_size(range = c(4, 20)) + theme_bw()
# 將 ggplot 圖轉換為 plotly 圖
ggplotly(p)
margin = 0.17 # assume margin = 0.17
DP = function(x,m0,b0,a0) {m0*plogis((10/a0)*(x-b0))}
mm=c(0.30, 0.20)
bb=c( 20, 20)
aa=c( 40, 40)
X <- seq(0, 60, 2)
data <- do.call(rbind, lapply(1:length(mm), function(i) data.frame(
color = c('重要挽留客戶', '重要保持客戶')[i], Cost = X,
Gain = DP(X, mm[i], bb[i], aa[i])
))) %>%
data.frame
ggplot(data, aes(x = Cost, y = Gain, col = color)) +
geom_line(size = 1.5, alpha = 0.5) +
theme_bw() +
ggtitle("Prob. Function: f(x|m,b,a)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
mm=c(0.20, 0.25)
bb=c( 15, 25)
aa=c( 40, 40)
X <- seq(0, 60, 2)
data <- do.call(rbind, lapply(1:length(mm), function(i) data.frame(
color = c('重要挽留客戶', '重要保持客戶')[i], Cost = X,
Gain = DP(X, mm[i], bb[i], aa[i])
))) %>%
data.frame
ggplot(data, aes(x = Cost, y = Gain, col = color)) +
geom_line(size = 1.5, alpha = 0.5) +
theme_bw() +
ggtitle("Prob. Function: f(x|m,b,a)")
mm=c(0.20, 0.25)
bb=c( 25, 30)
aa=c( 40, 40)
# 選擇符合條件的資料
selected_data <- B[B$age %in% c("a29", "a34", "a39"), ]
X = seq(10, 60, 1)
df = do.call(rbind, lapply(1:length(mm), function(i) {
sapply(X, function(x) {
dp = pmin(1-selected_data$Buy, DP(x,mm[i],bb[i],aa[i]))
eR = dp*selected_data$Rev*margin - x
c(i=i, x=x, eR.ALL=sum(eR), N=sum(eR>0), eR.SEL=sum(eR[eR > 0]) )
}) %>% t %>% data.frame
}))
df %>%
mutate_at(vars(eR.ALL, eR.SEL), function(y) round(y/1000)) %>%
gather('key','value',-i,-x) %>%
mutate(Instrument = paste0('I',i)) %>%
ggplot(aes(x=x, y=value, col=Instrument)) +
geom_hline(yintercept=0, linetype='dashed', col='blue') +
geom_line(size=1.5,alpha=0.5) +
xlab('工具選項(成本)') + ylab('預期收益($K)') +
ggtitle('行銷工具優化','假設行銷工具的效果是其成本的函數') +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw() -> p
plotly::ggplotly(p)
mm=c(0.20, 0.25)
bb=c( 25, 30)
aa=c( 40, 40)
# 選擇符合條件的資料
selected_data <- B[B$age %in% c("a49","a44","a54"), ]
X = seq(10, 60, 1)
df = do.call(rbind, lapply(1:length(mm), function(i) {
sapply(X, function(x) {
dp = pmin(1-selected_data$Buy, DP(x,mm[i],bb[i],aa[i]))
eR = dp*selected_data$Rev*margin - x
c(i=i, x=x, eR.ALL=sum(eR), N=sum(eR>0), eR.SEL=sum(eR[eR > 0]) )
}) %>% t %>% data.frame
}))
df %>%
mutate_at(vars(eR.ALL, eR.SEL), function(y) round(y/1000)) %>%
gather('key','value',-i,-x) %>%
mutate(Instrument = paste0('I',i)) %>%
ggplot(aes(x=x, y=value, col=Instrument)) +
geom_hline(yintercept=0, linetype='dashed', col='blue') +
geom_line(size=1.5,alpha=0.5) +
xlab('工具選項(成本)') + ylab('預期收益($K)') +
ggtitle('行銷工具優化','假設行銷工具的效果是其成本的函數') +
facet_wrap(~key,ncol=1,scales='free_y') + theme_bw() -> p
plotly::ggplotly(p)
library(dplyr)
mm=c(0.20, 0.25, 0.15)
bb=c( 25, 30, 15)
aa=c( 40, 40, 30)
cidx = lapply( # B's index for the 4 age groups
list(c("a29","a34","a39"),
c("a49","a44","a54"),c("a24","a64","a69")),
function(v) B$age %in% v)
X = seq(10, 60, 1)
df = do.call(rbind, lapply(1:length(mm), function(i) {
sapply(X, function(x) {
dp = pmin(1- B$Buy[ cidx[[i]] ] , DP(x,mm[i],bb[i],aa[i]))
eR = dp * B$Rev[ cidx[[i]] ] * margin - x
c(i=i, x=x, eR.ALL=sum(eR), N=sum(eR>0), eR.SEL=sum(eR[eR > 0]) )
}) %>% t %>% data.frame
}))
result = group_by(df, i) %>% top_n(1,eR.SEL)
print(result)
## # A tibble: 53 × 5
## # Groups: i [3]
## i x eR.ALL N eR.SEL
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 35 -81945. 3857 55266.
## 2 2 40 -55026. 3047 55254.
## 3 3 10 0 0 0
## 4 3 11 0 0 0
## 5 3 12 0 0 0
## 6 3 13 0 0 0
## 7 3 14 0 0 0
## 8 3 15 0 0 0
## 9 3 16 0 0 0
## 10 3 17 0 0 0
## # ℹ 43 more rows
# x eR.ALL N eR.SEL
#35 -93627 4002 56924 重要
#40 -55026 3047 55254 挽留
#Tafeng每個月的營收大約是2,500萬,獲利大約是390萬,一般零售商的行銷經費大約是獲利的5%~15%
#g = 0.156
#totalR = sum(Z0$price) # 102,585,246 / 4 = 25,646,311.5
#totalC = sum(Z0$cost) # 86,781,697 totalR - totalC # 15,803,549 / 4 = 3,950,887.25
g = 0.3 # 平均毛利率
N = 36 # 估計CLV期間(三年)
d = 0.01 # 資本利率
B$CLV = g * B$Rev * rowSums(sapply(
0:N, function(i) (B$Buy/(1+d))^i ) )
summary(B$CLV)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35 296 471 1365 810 1145741
ggplot(B, aes(CLV)) +
geom_histogram(bins=30, fill="green",alpha=0.6) +
scale_x_log10() + facet_wrap(~age)
B <- B %>%
mutate(age_group = case_when(
age %in% c("a29", "a34", "a39", "a59") ~ "重要挽留客戶",
age %in% c("a49", "a44", "a54") ~ "重要保持客戶",
TRUE ~ "Other"
))
ggplot(B, aes(CLV)) +
geom_histogram(bins = 30, fill = "green", alpha = 0.6) +
scale_x_log10() +
facet_wrap(~age_group)
# MOSA = function(formula, data) mosaic(formula, data, shade=T,
# margins=c(0,1,0,0), labeling_args = list(rot_labels=c(0,0,0,0)),
# gp_labels=gpar(fontsize=9), legend_args=list(fontsize=9),
# gp_text=gpar(fontsize=7),labeling=labeling_residuals)
#
# X0$age <- as.character(X0$age)
# custom_groups <- list(
# c("a29", "a34", "a39", "a59"),
# c("a44", "a49", "a54"),
# c("a24", "a64", "a69","a99")
# )
#
# X0$age_group <- character(length(X0$age))
#
# for (i in seq_along(custom_groups)) {
# group <- custom_groups[[i]]
# X0$age_group[X0$age %in% group] <- paste(group, collapse = ",")
# }
#
#
# X0$wday = format(X0$date, "%a")
# MOSA(~age_group + wday, X0)
#
# #X0$wday = format(X0$date, "%a")
# #MOSA(~age+wday, X0)
# load(data/tf0.rdata)
# group_by(Z0, age) %>% summarise(sum(price)/sum(cost) - 1)
margin = 0.5 # assume margin = 0.17
DP = function(x,m0,b0,a0) {m0*plogis((10/a0)*(x-b0))}
# # 設定目標客群
# cidx = lapply( # B's index for the 4 age groups
# list(c("a29","a34","a39"),
# c("a49","a44","a54")),
# function(v) B$age %in% v)
# 設定目標客群
cidx = lapply( # B's index for the 4 age groups
list(c("a44","a49","a54")),
#list(c("a29","a34","a39")),
function(v) B$age %in% v)

#cidx <- list(c("a29", "a34", "a39"))
# 會員集點
#c("a29","a34","a39") m:0.2 b:15 a:40
# X:27 eR.ALL:836435 N:12991 eR.SEL:844141 增加回購:0.190
#c("a44","a49","a54") m:0.25 b:25 a:40
# X:38 eR.ALL:659212 N:8900 eR.SEL:673686 增加回購:0.240
# 團購
#c("a29","a34","a39") m:0.3 b:20 a:40
# X:34 eR.ALL:1318205 N:13084 eR.SEL:1327606 增加回購:0.291
#c("a44","a49","a54") m:0.2 b:20 a:40
# X:32 eR.ALL:518662 N:8889 eR.SEL:530438 增加回購:0.190
mm <- c(0.25, 0.2)
bb <- c(25, 20)
aa <- c(40, 40)
X <- seq(2, 100, 1)
df <- do.call(rbind, lapply(1:length(mm), function(i) {
sapply(X, function(x) {
dp <- pmin(1 - B$Buy[cidx[[1]]], DP(x, mm[i], bb[i], aa[i]))
dp.R <- dp * B$Rev[cidx[[1]]] # 預期總營收增額
eR <- dp * B$Rev[cidx[[1]]] * margin - x
c(i = i, x = x, eR.ALL = sum(eR), N = sum(eR > 0), eR.SEL = sum(eR[eR > 0]))
}) %>% t %>% data.frame
}))
result = group_by(df, i) %>% top_n(1,eR.SEL)
### 模擬最佳參數
# 設定目標客群
cidx <- lapply(
list(c("a29", "a34", "a39")),
#list(c("a49","a44","a54")),
function(v) B$age %in% v
)
mm <- seq(0.04, 0.25, 0.01)
#mm <- 0.25
#bb <- seq(10, 50, 5)
bb <- 20
aa <- 40
#aa <- seq(10, 50, 5)
X <- seq(2, 100, 1)
param_combinations <- expand.grid(mm = mm, bb = bb, aa = aa, X = X)
df <- do.call(rbind, lapply(1:nrow(param_combinations), function(i) {
mm_val <- param_combinations[i, 'mm']
bb_val <- param_combinations[i, 'bb']
aa_val <- param_combinations[i, 'aa']
x_val <- param_combinations[i, 'X']
dp <- pmin(1 - B$Buy[cidx[[1]]], DP(x_val, mm_val, bb_val, aa_val))
dp.R <- dp * B$Rev[cidx[[1]]] # 預期總營收增額
eR <- dp * B$Rev[cidx[[1]]] * margin - x_val
c(mm = mm_val, bb = bb_val, aa = aa_val, X = x_val, eR.ALL = sum(eR), N = sum(eR > 0), eR.SEL = sum(eR[eR > 0]))
}))
# 將結果存儲到 df 中
df <- as.data.frame(df)
# # 找到 "eR.SEL" 中的最大值的索引
# max_index <- which.max(df$eR.SEL)
#
# # 提取相應的行
# max_row <- df[max_index, ]
#
# # 顯示結果
# print(max_row
# 找到
top_five_indices <- order(df$eR.SEL, decreasing = TRUE)[1:5]
# 提取相應的行
top_five_rows <- df[top_five_indices, ]
# 顯示結果
print(top_five_rows)
## mm bb aa X eR.ALL N eR.SEL
## 704 0.25 20 40 33 1047850 12955 1058010
## 682 0.25 20 40 32 1047080 12991 1056572
## 726 0.25 20 40 34 1045603 12913 1056484
## 748 0.25 20 40 35 1040934 12878 1052584
## 660 0.25 20 40 31 1042574 13018 1051445